This document walks through some of our findings as well as the steps
I took to get there. If you’re feeling very adventurous, you can click
on the “code” buttons to the right for a tutorial on how to do my job,
or you can leave them be and just pre
### Gathering the data
We start by figuring out which organizations are in Louisville. We’ll use data from the IRS Business Master File (downloaded 6/9/2022). This document contains a full listing of tax-exempt organizations across the county, as well as some basic financial info (revenue, expenses, and assets).
To subset the data to the Louisville area, we start by filtering the data to organizations with a zip code within the Louisville MSA. Zip codes cross county and MSA boundaries, so some of these organizations lie outside the MSA, and we’ll need to remove them.
About 15% of organizations have a PO box as their primary address. We include them in our list if more than 50% of their zip code is within the Louisville MSA. (We calculate the percent of the zip code in the Louisville MSA as the percent of the businesses in that zip code that are in the Louisville MSA.) For those in the Louisville MSA, we assign them to the county which contains the largest amount of that zip code.
The remaining 85% of businesses have a street address. For these organizations, we find the organization’s longitude and latitude using the Census Bureau’s address geocoding service, and we fill in missing values using the private service Geocodio. We then compare their latitude and longitude to a map of counties in the Louisville are to decide whether they are in the Louisville MSA and to decide which county they are located in.
While we’re working with our map data, we also assign nonprofits to neighborhoods and Metro Council Districts. As mentioned before, zip codes are included in the original dataset.
The map below shows the original list, as defined by zip codes. We remove the organizations represented by the blue dots, keeping only those in grey within the Louisville MSA boundaries. You can mouse over the list to see the organizations’ names.
# Read in IRS Business Master files and combine into one data frame
bmf1 <- read_csv("eo1.csv", col_types = "cccccccncnncnncnnnnnnnnnnncc")
bmf2 <- read_csv("eo2.csv", col_types = "cccccccncnncnncnnnnnnnnnnncc")
bmf3 <- read_csv("eo3.csv", col_types = "cccccccncnncnncnnnnnnnnnnncc")
bmf4 <- read_csv("eo4.csv", col_types = "cccccccncnncnncnnnnnnnnnnncc")
bmf <- bind_rows(bmf1, bmf2, bmf3, bmf4)
rm(bmf1, bmf2, bmf3, bmf4)
ntee_categories <- read_csv("NTEE categories.csv", col_types = "ccc")
ntee_codes <- read_csv("NTEE codes.csv", col_types = "ccc")
subsection_codes <- read_csv("Subsection codes.csv", col_types = "ccc")
ntee_to_cnpe <- read_csv("CNPE to NTEE.csv", col_types = "ccc")
# https://github.com/Nonprofit-Open-Data-Collective/irs-exempt-org-business-master-file#activity-codes
activity_to_ntee <- readxl::read_excel("Activity to NTEE.xlsx", sheet = 2)
ntee_categories %<>%
rename(
NTEE_cat = `NTEE Code`) %>%
left_join(ntee_to_cnpe, by = c("NTEE_cat" = "NTEE Code", "Description" = "NTEE"))
ntee_codes %<>%
rename(
NTEE_code = `NTEE Code`)
activity_to_ntee %<>%
transmute(
ACTIVITY = str_pad(ACTIVITY, 3, "left", "0"),
NTEE_replacement = if_else(!is.na(NTEE_replacement), NTEE_replacement, NTEE))
bmf %<>%
mutate(
EIN,
name = NAME,
street = STREET,
city = CITY,
state = STATE,
zip = str_sub(ZIP, 1, 5),
tax_year = str_sub(TAX_PERIOD, 1, 4) %>% as.numeric(),
creation = RULING,
filing_type = if_else(PF_FILING_REQ_CD == 1, "990PF",
if_else(FILING_REQ_CD %in% c(1, 3, 4), "990",
if_else(FILING_REQ_CD %in% c(2), "990N",
"other"))),
revenue = REVENUE_AMT,
assets = ASSET_AMT,
activity_codes = ACTIVITY,
NTEE_code = NTEE_CD,
NTEE_cat = str_sub(NTEE_CD, 1, 1))
# MSA_zip_crosswalk <- glptools::MSA_zip %>%
# filter(MSA == "31140") %>%
# mutate(business_in_county = round(business_in_county, 1))
MSA_zip_crosswalk <- glptools::MSA_zip %>%
filter(MSA == "31140")
bmf_lou <- bmf %>%
left_join(MSA_zip_crosswalk, by = "zip") %>%
filter(!is.na(MSA)) %>%
select(EIN, street, city, state, zip)
# bmf_sure <- bmf %>%
# filter(business_in_county == 100) %>%
# mutate(lou_msa = "sure")
#
# bmf_check <- bmf %>%
# filter(business_in_county > 0, business_in_county < 100) %>%
# mutate(lou_msa = "unsure")
#
# output <- bind_rows(bmf_sure, bmf_check)
#write_csv(output, "Louisville Nonprofits 6_9_2022.csv")
# Remove PO Boxes as they will not be addressed
bmf_lou_po <- bmf_lou %>%
filter(str_detect(street, "PO BOX|PO B0X"))
bmf_lou_remaining <- bmf_lou %>%
anti_join(bmf_lou_po, by = c("EIN", "street", "city", "state", "zip"))
bmf_lou_po %<>%
left_join(MSA_zip_crosswalk, by = "zip") %>%
filter(business_in_county >= 50) %>%
select(EIN, street, city, state, zip)
# Geocode using the Census Bureau geocoder
bmf_lou_census <- bmf_lou_remaining %>%
geocode(
street = street,
city = city,
state = state,
postalcode = zip,
method = "census")
bmf_lou_remaining <- bmf_lou_census %>%
filter(is.na(lat)) %>%
select(-lat, -long)
bmf_lou_census %<>%
filter(!is.na(lat))
# Geocode using the geocodio geocoder
Sys.setenv(GEOCODIO_API_KEY = "cccff3c3cc3aca633fc09ccc3901c1a861a9069")
bmf_lou_geocodio <- bmf_lou_remaining %>%
geocode(
street = street,
city = city,
state = state,
postalcode = zip,
method = "geocodio")
# Bind geocoded information and save
bmf_lou_geocoded <- bind_rows(bmf_lou_census, bmf_lou_geocodio, bmf_lou_po)
bmf_lou_geocoded %<>%
select(EIN, lat, long)
save(bmf_lou_geocoded, file = "bmf_lou_geocoded.RData")
load("bmf_lou_geocoded.RData")
# For PO boxes, assign EINS to the county that contains the majority of the zip code
bmf_lou_POs <- bmf_lou_geocoded %>%
filter(is.na(lat)) %>%
left_join(bmf, by = "EIN") %>%
select(EIN, zip) %>%
left_join(FIPS_zip_full_MSA, by = "zip") %>%
group_by(EIN) %>%
arrange(desc(business_in_county)) %>%
filter(row_number() == 1) %>%
ungroup() %>%
select(EIN, FIPS)
# For other addresses, create sf object and join with an MSA and neighborhood map
bmf_lou_addressed <- bmf_lou_geocoded %>%
filter(!is.na(lat)) %>%
st_as_sf(coords = c("long", "lat"), crs = 4326)
bmf_lou_county <- bmf_lou_addressed %>%
st_join(map_msa_lou, st_within)
bmf_lou_nh <- bmf_lou_addressed %>%
st_join(map_nh, st_within) %>%
st_drop_geometry()
bmf_lou_district <- bmf_lou_addressed %>%
st_join(map_district, st_within) %>%
st_drop_geometry()
pal <- colorFactor(palette = c("#00a9b7FF", "#333333"),
domain = c("Keep", "Drop"))
labels <- sprintf("%s",
left_join(bmf_lou_county, bmf, by = "EIN")$name) %>%
lapply(htmltools::HTML)
leaflet() %>%
addPolygons(data = map_msa_lou,
color = "black",
opacity = 1,
weight = 3,
fillOpacity = 0) %>%
addCircleMarkers(data = mutate(bmf_lou_county, keep = if_else(is.na(FIPS), "Drop", "Keep")),
radius = 2,
color = ~pal(keep),
label = labels,
labelOptions = labelOptions(
style = list("font-weight" = "normal", "font-family" = "Arial", padding = "3px 8px"),
textsize = "15px",
direction = "auto")) %>%
addLegend(pal = pal, values = c("Drop", "Keep")) %>%
addTiles() %>%
setView(-85.6, 38.2, zoom = 8)
rm(pal)
# Filter to EINs in Lou MSA and remove county polygons from the data frame
bmf_lou_county %<>%
st_drop_geometry() %>%
filter(!is.na(FIPS)) %>%
select(-county)
# Add neighborhood names to the data frame
bmf_lou_county %<>%
left_join(bmf_lou_nh, by = "EIN") %>%
left_join(bmf_lou_district, by = "EIN")
# Add PO box data
bmf_lou_county %<>%
bind_rows(bmf_lou_POs)
bmf_lou <- bmf_lou_county
bmf_lou_points <- bmf_lou_addressed %>%
filter(EIN %in% bmf_lou$EIN)
rm(bmf_lou_geocoded, bmf_lou_county, bmf_lou_nh, bmf_lou_district, bmf_lou_POs, bmf_lou_addressed)
filing_data <- any_time("org_indices", starting_year = 2020, col_types = "cccccccccn")
filing_data %<>%
filter(EIN %in% bmf_lou$EIN) %>%
mutate(OBJECT_ID = str_remove(OBJECT_ID, ","))
# Create list of XML files
files_df <- data.frame(
folder = character(),
file = character())
for(i in 1:8) {
this_folder <- "xml_files/download990xml_2020_" %p% i
these_files <- list.files(this_folder)
temp <- data.frame(folder = this_folder,
file = these_files)
files_df %<>% bind_rows(temp)
}
for(i in 1:6) {
this_folder <- "xml_files/download990xml_2021_" %p% i
these_files <- list.files(this_folder)
temp <- data.frame(folder = this_folder,
file = these_files)
files_df %<>% bind_rows(temp)
}
for(i in 1:1) {
this_folder <- "xml_files/download990xml_2022"# %p% i
these_files <- list.files(this_folder)
temp <- data.frame(folder = this_folder,
file = these_files)
files_df %<>% bind_rows(temp)
}
files_df %<>%
mutate(file_name = str_remove(file, "_public\\.xml"),
path = paste0(folder, "/", file)) %>%
select(file_name, path)
filing_data %<>% left_join(files_df, by = c("OBJECT_ID" = "file_name"))
# Pull XML files into a large list
output <- list()
for (i in 1:nrow(filing_data)){
xml_obj <- tryCatch({
XML::xmlParse(filing_data$path[i])
},
error = function(e) list("missing"))
output <- c(output, xml_obj)
print(i)
}
#test <- read_xmls("xml_files/download990xml_2022")
# Function to get certain data points from these columns
safe_get <- function(obj, cols) {
for (c in cols) {
these_cols <- names(obj) == c
if (c == "CompensationAmt") {
compensations <- flatten(obj)[names(flatten(obj)) == "CompensationAmt"]
jobs <- as.character(sum(compensations != "0"))
return(jobs)
}
if (sum(these_cols) == 1) {
obj <- obj[[c]]
} else {
obj <- obj[names(obj) == c]
obj <- map(obj, function(x) flatten(x))
}
}
if(length(obj) == 0) return(NA_character_)
obj
}
extract_fxn <- function(xml_obj) {
if (is.character(xml_obj)) return(data.frame())
df <- xml_obj %>%
xmlToList()
all_data <- df
this_EIN = safe_get(df[["ReturnHeader"]][["Filer"]], "EIN")
this_return_type = safe_get(df[["ReturnHeader"]], "ReturnTypeCd")
this_return_period = safe_get(df[["ReturnHeader"]], "TaxPeriodEndDt")
if (this_return_type == "990") {
df <- df[["ReturnData"]][["IRS990"]]
output_df <- data.frame(
EIN = this_EIN,
return_type = this_return_type,
return_period = this_return_period,
PYContributionsGrantsAmt = safe_get(df, "PYContributionsGrantsAmt"),
CYContributionsGrantsAmt = safe_get(df, "CYContributionsGrantsAmt"),
PYProgramServiceRevenueAmt = safe_get(df, "PYProgramServiceRevenueAmt"),
CYProgramServiceRevenueAmt = safe_get(df, "CYProgramServiceRevenueAmt"),
PYInvestmentIncomeAmt = safe_get(df, "PYInvestmentIncomeAmt"),
CYInvestmentIncomeAmt = safe_get(df, "CYInvestmentIncomeAmt"),
PYOtherRevenueAmt = safe_get(df, "PYOtherRevenueAmt"),
CYOtherRevenueAmt = safe_get(df, "CYOtherRevenueAmt"),
PYRevenuesLessExpensesAmt = safe_get(df, "PYRevenuesLessExpensesAmt"),
CYRevenuesLessExpensesAmt = safe_get(df, "CYRevenuesLessExpensesAmt"),
PYSalariesCompEmpBnftPaidAmt = safe_get(df, "PYSalariesCompEmpBnftPaidAmt"),
CYSalariesCompEmpBnftPaidAmt = safe_get(df, "CYSalariesCompEmpBnftPaidAmt"),
PYTotalExpensesAmt = safe_get(df, "PYTotalExpensesAmt"),
CYTotalExpensesAmt = safe_get(df, "CYTotalExpensesAmt"),
MissionDesc = safe_get(df, "MissionDesc"),
WebsiteAddressTxt = safe_get(df, "WebsiteAddressTxt"),
employees = safe_get(df, "EmployeeCnt"),
FederatedCampaignsAmt = safe_get(df, "FederatedCampaignsAmt"),
MembershipDuesAmt = safe_get(df, "MembershipDuesAmt"),
FundraisingAmt = safe_get(df, "FundraisingAmt"),
RelatedOrganizationsAmt = safe_get(df, "RelatedOrganizationsAmt"),
GovernmentGrantsAmt = safe_get(df, "GovernmentGrantsAmt"),
AllOtherContributionsAmt = safe_get(df, "AllOtherContributionsAmt"),
NonCashContributionsAmt = safe_get(df, "NonCashContributionsAmt"),
TotalContributionsAmt = safe_get(df, "TotalContributionsAmt"),
GrantsToDomesticOrgsGrp = safe_get(df, c("GrantsToDomesticOrgsGrp", "TotalAmt")),
GrantsToDomesticIndividualsGrp = safe_get(df, c("GrantsToDomesticIndividualsGrp", "TotalAmt")),
CompCurrentOfcrDirectorsGrp = safe_get(df, c("CompCurrentOfcrDirectorsGrp", "TotalAmt")),
OtherSalariesAndWagesGrp = safe_get(df, c("OtherSalariesAndWagesGrp", "TotalAmt")),
PayrollTaxesGrp = safe_get(df, c("PayrollTaxesGrp", "TotalAmt")),
FeesForServicesAccountingGrp = safe_get(df, c("FeesForServicesAccountingGrp", "TotalAmt")),
AdvertisingGrp = safe_get(df, c("AdvertisingGrp", "TotalAmt")),
OfficeExpensesGrp = safe_get(df, c("OfficeExpensesGrp", "TotalAmt")),
InformationTechnologyGrp = safe_get(df, c("InformationTechnologyGrp", "TotalAmt")),
ConferencesMeetingsGrp = safe_get(df, c("ConferencesMeetingsGrp", "TotalAmt")),
InsuranceGrp = safe_get(df, c("InsuranceGrp", "TotalAmt")),
#OtherExpensesGrp = safe_get(df, c("GrantsToDomesticOrgsGrp", "TotalAmt")),
TotalFunctionalExpensesGrp = safe_get(df, c("TotalFunctionalExpensesGrp", "TotalAmt")))
} else if (this_return_type == "990EZ") {
df <- df[["ReturnData"]][["IRS990EZ"]]
output_df <- data.frame(
EIN = this_EIN,
return_type = this_return_type,
return_period = this_return_period,
CYContributionsGrantsAmt = safe_get(df, "ContributionsGiftsGrantsEtcAmt"),
CYProgramServiceRevenueAmt = safe_get(df, "ProgramServiceRevenueAmt"),
PYRevenuesLessExpensesAmt = safe_get(df, "PYRevenuesLessExpensesAmt"),
CYRevenuesLessExpensesAmt = safe_get(df, "CYRevenuesLessExpensesAmt"),
CYSalariesCompEmpBnftPaidAmt = safe_get(df, "SalariesOtherCompEmplBnftAmt"),
CYTotalExpensesAmt = safe_get(df, "TotalExpensesAmt"),
GrantsAndSimilarAmountsPaidAmt = safe_get(df, "GrantsAndSimilarAmountsPaidAmt"),
TotalExpensesAmt = safe_get(df, "GrantsAndSimilarAmountsPaidAmt"),
MissionDesc = safe_get(df, "MissionDesc"),
WebsiteAddressTxt = safe_get(df, "WebsiteAddressTxt"),
employees = safe_get(df, c("OfficerDirectorTrusteeEmplGrp", "CompensationAmt")))
} else if (this_return_type %in% c("990PF", "990T")) {
df <- df[["ReturnData"]][["IRS990PF"]]
output_df <- data.frame(
EIN = this_EIN,
return_type = this_return_type,
return_period = this_return_period)
}
return(output_df)
}
# extract_layout <- function(xml_obj) {
#
# if (is.character(xml_obj)) return(data.frame())
#
# df <- xml_obj %>%
# xmlToList()
#
# df
# }
#
# output_layout <- map(output, extract_layout)
#
# save(output_layout, file = "output_layout.RData")
library(tictoc)
tic()
output_internal_data <- map_dfr(output, extract_fxn)
toc()
#save(output_internal_data, file = "output_internal_data2.RData")
load("output_layout.RData")
load("output_internal_data2.RData")
test = output_internal_data %>%
mutate(across(PYContributionsGrantsAmt:CYSalariesCompEmpBnftPaidAmt, as.numeric)) %>%
mutate(across(employees:TotalFunctionalExpensesGrp, as.numeric)) %>%
mutate(across(FederatedCampaignsAmt:TotalContributionsAmt, ~replace_na(., 0))) %>%
mutate(revenue_check = TotalContributionsAmt - (FederatedCampaignsAmt + MembershipDuesAmt + FundraisingAmt +
RelatedOrganizationsAmt + GovernmentGrantsAmt + AllOtherContributionsAmt + NonCashContributionsAmt) == 0)
load("output_internal_data2.RData")
df_990_2021 <- read_csv("21eoextract990.csv")
df_990ez_2021 <- read_csv("21eoextractez.csv")
test <- df_990_2021 %>%
transmute(
EIN,
tax_period = tax_pd,
employees = noemplyeesw3cnt)
df_990_2020 <- readxl::read_excel("20eoextract990.xlsx", progress = TRUE)
df_990ez_2021 <- readxl::read_excel("21eoextractez.xlsx", progress = TRUE)
df_990ez_2020 <- readxl::read_excel("20eoextractez.xlsx", progress = TRUE)
Deciding which organizations fit where is complicated, to say the least. We have a couple ways of approaching it:
bmf_lou %<>%
left_join(bmf, by = "EIN")
# Change AMVETS from subsection 91 to 19.
bmf_lou$SUBSECTION[bmf_lou$name == "AMVETS"] <- "19"
# Change Thornton to employee benefit trust
bmf_lou$SUBSECTION[bmf_lou$EIN == "611040959"] <- "09"
# Classify 501(c)(3) organizations as foundations, Churches/Religious orgs, Government entities, or charitable nonprofits
bmf_lou %<>%
left_join(subsection_codes, by = "SUBSECTION") %>%
mutate(
org_type = case_when(
SUBSECTION != "03" ~ `short name`,
PF_FILING_REQ_CD == 1 ~ "Foundation",
FILING_REQ_CD %in% c(6, 13) ~ "Church or Religious Organization",
FILING_REQ_CD == 14 ~ "Government Entities",
# other FILING_REQ_CD: 1, 2, 3
FILING_REQ_CD %in% 1:3 ~ "Charitable Nonprofit",
FOUNDATION %in% c(10) ~ "Church or Religious Organization",
FOUNDATION %in% c(4, 11, 16, 17) ~ "Foundation",
FOUNDATION %in% c(15) ~ "Government Entities"))
# Replace missing NTEE codes with an activity code to NTEE crosswalk
bmf_lou %<>%
mutate(activity_code_1 = str_sub(ACTIVITY, 1, 3)) %>%
left_join(activity_to_ntee, by = c("activity_code_1" = "ACTIVITY")) %>%
mutate(
NTEE_code = if_else(is.na(NTEE_code), NTEE_replacement, NTEE_code),
NTEE_cat = str_sub(NTEE_code, 1, 1))
# Add NTEE1 and NTEE2
bmf_lou %<>%
left_join(ntee_categories, by = "NTEE_cat") %>%
mutate(NTEE1 = Description) %>%
select(-Description, -Definition) %>%
left_join(ntee_codes, by = "NTEE_code") %>%
mutate(NTEE2 = Description)%>%
select(-Description, -Definition) %>%
mutate(NTEE2 = if_else(!is.na(NTEE1) & is.na(NTEE2), "uncategorized", NTEE2))
library(sunburstR)
library(d3r)
df <- bmf_lou_classified %>%
select(org_type, NTEE1, NTEE2, name, revenue) %>%
mutate(
org_type = str_replace(org_type, "-", " "),
NTEE1 = str_replace(NTEE1, "-", " "),
NTEE2 = str_replace(NTEE2, "-", " "),
name = str_replace(name, "-", " "),
revenue = if_else(revenue < 0, 0, revenue)) %>%
mutate(org_type = replace_na(org_type, "uncategorized"),
NTEE1 = replace_na(NTEE1, "uncategorized")) %>%
count(org_type, NTEE1, NTEE2, name) %>%
rename(size = n)
tree <- d3_nest(df, value_cols = "size")
sunburst(tree, width="100%", height=600, legend = FALSE)
sund2b(tree, width="100%")
org_type1 <- bmf_lou_classified %>%
group_by(org_type) %>%
summarize(n = n(), .groups = "drop")
org_type2 <- bmf_lou_classified %>%
filter(org_type == "Charitable Nonprofit") %>%
group_by(NTEE1) %>%
summarize(n = n(), .groups = "drop")
org_type3 <- bmf_lou_classified %>%
filter(org_type == "Charitable Nonprofit") %>%
group_by(NTEE2) %>%
summarize(n = n(), .groups = "drop") %>%
mutate(NTEE_cat = str_sub(NTEE2, 1, 1)) %>%
left_join(ntee_categories, by = "NTEE_cat")
org_type1 <- bmf_lou_classified %>%
group_by(org_type) %>%
summarize(n = sum(revenue, na.rm = TRUE), .groups = "drop")
org_type2 <- bmf_lou_classified %>%
filter(org_type == "Charitable Nonprofit") %>%
group_by(NTEE1) %>%
summarize(n = sum(revenue, na.rm = TRUE), .groups = "drop")
org_type3 <- bmf_lou_classified %>%
filter(org_type == "Charitable Nonprofit") %>%
group_by(NTEE2) %>%
summarize(n = sum(revenue, na.rm = TRUE), .groups = "drop")
Our first way of separating out types of nonprofits is to look at thir subsection codes. We are primarily concerned with 501(c)(3)s, or charitable nonprofits, and all information from here on out will focus on those.
bmf_lou %>%
group_by(SUBSECTION, `short name`) %>%
count() %>%
ungroup() %>%
gt()
| SUBSECTION | short name | n |
|---|---|---|
| 01 | Credit Unions | 4 |
| 02 | Title-holding Organization | 10 |
| 03 | Charitable Nonprofits | 5406 |
| 04 | Civic Associations | 259 |
| 05 | Labor and Agricultural Organizations | 114 |
| 06 | Business Associations | 235 |
| 07 | Social Clubs and Greek Organizations | 248 |
| 08 | Fraternal Societies | 184 |
| 09 | Employee Benefit Trusts | 15 |
| 10 | Fraternal Societies | 53 |
| 12 | Utilities | 16 |
| 13 | Cemeteries | 54 |
| 14 | Credit Unions | 7 |
| 15 | Mutual Insurance | 1 |
| 19 | Veteran Support Organizations | 82 |
| 92 | Trusts | 11 |
501(c)(3)s are further grouped by foundation codes. The biggest distinction this creates is between private foundations and publicly-supported charities. At this stage, we classify organizations into the following groups: * Foundations, * Churches * Schools * Hospital and Medical Research Organizations, * Government Entities * Public Charities
bmf_lou %<>%
mutate(
org_type = case_when(
SUBSECTION != "03" ~ `short name`,
FOUNDATION %in% c(2:4, 21) ~ "Foundation",
FOUNDATION == 10 ~ "Church or Religious Organization",
FOUNDATION %in% c(11, 13, 23) ~ "School",
FOUNDATION %in% c(12, 22, 24) ~ "Hospital or Medical Research Org",
FOUNDATION == 14 ~ "Government Entity",
FOUNDATION %in% 15:17 ~ "Public charity"))
# other FILING_REQ_CD: 1, 2, 3
# FILING_REQ_CD == 13 ~ "Church or Religious Organization",
# FILING_REQ_CD == 14 ~ "Government Entities"))
bmf_lou %>%
filter(SUBSECTION == "03") %>%
group_by(org_type) %>%
count() %>%
ungroup() %>%
gt() %>%
apply_table_settings()
| org_type | n |
|---|---|
| Church or Religious Organization | 943 |
| Foundation | 458 |
| Government Entity | 2 |
| Hospital or Medical Research Org | 51 |
| Public charity | 3872 |
| School | 80 |
The next grouping we can create are NTEE Codes.
output_internal_data %<>%
group_by(EIN) %>%
arrange(desc(return_period)) %>%
filter(row_number() == 1) %>%
ungroup()
output_internal_data %<>%
mutate(across(PYContributionsGrantsAmt:CYSalariesCompEmpBnftPaidAmt, as.numeric)) %>%
mutate(across(employees:TotalFunctionalExpensesGrp, as.numeric)) %>%
mutate(across(FederatedCampaignsAmt:TotalContributionsAmt, ~replace_na(., 0)))
# mutate(revenue_check = TotalContributionsAmt - (FederatedCampaignsAmt + MembershipDuesAmt + FundraisingAmt +
# RelatedOrganizationsAmt + GovernmentGrantsAmt + AllOtherContributionsAmt + NonCashContributionsAmt) == 0)
bmf_lou %<>%
left_join(output_internal_data, by = "EIN")
#
# test <- bmf_lou %>%
# select(EIN, SUBSECTION, org_type, revenue) %>%
# filter(SUBSECTION == "03", org_type == "Public charity") %>%
# left_join(output_internal_data, by = "EIN")
#
# test2 <- bmf_lou %>%
# anti_join(test, by = "EIN") %>%
# left_join(output_internal_data, by = "EIN") %>%
# filter(EIN %in% output_internal_data$EIN)
#
# test_990 <- test %>%
# filter(EIN %in% output_internal_data$EIN)
#
# test_no_990 <- test %>%
# filter(EIN %not_in% output_internal_data$EIN)
ntee_df <- bmf_lou %>%
filter(SUBSECTION == "03", org_type == "Public charity") %>%
group_by(CNPE) %>%
summarize(
Nonprofits = n(),
Revenue = sum(revenue, na.rm = T),
Employees = sum(employees, na.rm = TRUE),
Payroll = sum(CYSalariesCompEmpBnftPaidAmt, na.rm = TRUE)) %>%
ungroup()
ntee_df %>%
downloadthis::download_this(
output_name = "Nonprofits by CNPE Category",
output_extension = ".csv",
button_label = "Download data",
button_type = "warning",
has_icon = TRUE,
icon = "fa fa-save",
csv2 = FALSE)
ntee_df %>%
gt() %>%
fmt_integer(columns = c(Nonprofits, Employees)) %>%
fmt_currency(columns = c(Revenue, Payroll), suffix = T) %>%
apply_table_settings()
| CNPE | Nonprofits | Revenue | Employees | Payroll |
|---|---|---|---|---|
| Animal-Related | 114 | $16.97M | 229 | $5.16M |
| Arts, Culture & Humanities | 371 | $105.25M | 1,893 | $56.32M |
| Civil Rights, Social Action, Advocacy | 34 | $15.83M | 237 | $9.50M |
| Community Improvement, Capacity Building | 155 | $30.58M | 78 | $3.22M |
| Education | 676 | $591.23M | 963 | $23.26M |
| Employment, Job Related | 24 | $87.94M | 3,322 | $43.80M |
| Environmental Quality, Protection and Beautification | 74 | $17.51M | 155 | $4.66M |
| Food, Agriculture, and Nutrition | 45 | $57.64M | 96 | $4.62M |
| Health Services | 320 | $1.77B | 15,499 | $1.11B |
| Human Services | 952 | $756.84M | 15,118 | $372.01M |
| International, Foreign Affairs, and National Security | 51 | $9.20M | 56 | $2.62M |
| Mutual/Membership Benefit | 10 | $7.42M | 0 | $0.00 |
| Public, Society Benefit | 55 | $12.53M | 40 | $2.61M |
| Recreation, Sports, Leisure, Athletics | 290 | $202.41M | 918 | $56.80M |
| Religion Related, Spiritual Development | 279 | $69.47M | 588 | $18.14M |
| Research | 15 | $1.23M | 5 | $588.42K |
| Unknown | 78 | $7.56M | 165 | $2.97M |
| Volunteerism | 150 | $287.29M | 615 | $16.19M |
| NA | 179 | $52.96M | 535 | $13.63M |
bmf_lou %>%
downloadthis::download_this(
output_name = "Business Master File",
output_extension = ".csv",
button_label = "Download data",
button_type = "warning",
has_icon = TRUE,
icon = "fa fa-save",
csv2 = FALSE)
Jefferson County has the largest number of nonprofits, though Harrison County happens to have the most per capita! You can mouse over the map below to see the numbers for each county. Click on the “Table” tab to see the data in a table format and to download the information.
county_pop_df <- glpdata::population_msa_counties %>%
filter(year == 2019,
sex == "total",
race == "total")
county_df <- bmf_lou %>%
group_by(FIPS) %>%
count() %>%
ungroup() %>%
left_join(county_pop_df, by = "FIPS") %>%
left_join(MSA_FIPS_info, by = "FIPS") %>%
transmute(
FIPS,
County = county %p% if_else(str_sub(FIPS, 1, 2) == "18",
", IN", ", KY"),
Nonprofits = n,
`Nonprofits per 1000 Residents` = round(n / population * 1000, 1))
county_df_csi <- bmf_lou %>%
group_by(FIPS) %>%
filter(SUBSECTION == "03", org_type == "Public charity") %>%
summarize(
CSIs = n(),
Revenue = sum(revenue, na.rm = T),
Employees = sum(employees, na.rm = TRUE),
Payroll = sum(CYSalariesCompEmpBnftPaidAmt, na.rm = TRUE))
county_df %<>%
left_join(county_df_csi, by = c("FIPS")) %>%
select(-FIPS)
county_df %<>%
mutate(
county = str_extract(County, "^.*(?= County)")) %>%
left_join(map_msa_lou, by = "county")
pal <- leaflet::colorNumeric(
palette = RColorBrewer::brewer.pal(9, "BuPu"),
domain = range(county_df$`Nonprofits per 1000 Residents`))
labels <- sprintf("%s<br/>%s<br/>%s",
county_df$County,
"Nonprofit HQs per 1,000 residents: " %p% county_df$`Nonprofits per 1000 Residents`,
"Total Nonprofits: " %p% county_df$`Nonprofits`) %>%
lapply(htmltools::HTML)
leaflet(st_as_sf(county_df)) %>%
addPolygons(
color = "#444444", weight = 1,
smoothFactor = 0.5, opacity = 1.0, fillOpacity = 0.5,
fillColor = ~pal(`Nonprofits per 1000 Residents`),
label = labels,
labelOptions = labelOptions(
style = list("font-weight" = "normal", "font-family" = "Arial", padding = "3px 8px"),
textsize = "15px",
direction = "auto"))
county_df %<>%
select(County, Nonprofits, `Nonprofits per 1000 Residents`,
`Charitable Social Impact Orgs` = CSIs, Revenue, Payroll, Employees)
county_df %>%
downloadthis::download_this(
output_name = "Nonprofits by County",
output_extension = ".csv",
button_label = "Download data",
button_type = "warning",
has_icon = TRUE,
icon = "fa fa-save",
csv2 = FALSE)
county_df %>%
gt() %>%
fmt_integer(columns = c(Nonprofits, `Charitable Social Impact Orgs`, Employees)) %>%
fmt_currency(columns = c(Revenue, Payroll), suffix = T) %>%
tab_header(title = "Nonprofits by county",
subtitle = "") %>%
tab_spanner(
label = "Nonprofits",
columns = vars(Nonprofits, `Nonprofits per 1000 Residents`)) %>%
tab_spanner(
label = "Charitable Social Impact Organizations",
columns = vars(`Charitable Social Impact Orgs`, Revenue, Employees, Payroll)) %>%
cols_label(
Nonprofits = "Number",
`Nonprofits per 1000 Residents` = "Number per 1000 Residents",
`Charitable Social Impact Orgs` = "Number",
Revenue = "Total Revenue") %>%
apply_table_settings()
| Nonprofits by county | ||||||
|---|---|---|---|---|---|---|
| County | Nonprofits | Charitable Social Impact Organizations | ||||
| Number | Number per 1000 Residents | Number | Total Revenue | Employees | Payroll | |
| Clark County, IN | 485 | 4.1 | 247 | $113.47M | 1,455 | $57.26M |
| Floyd County, IN | 373 | 4.8 | 218 | $64.65M | 510 | $15.11M |
| Harrison County, IN | 239 | 6.0 | 135 | $21.81M | 189 | $2.11M |
| Washington County, IN | 139 | 5.0 | 71 | $4.67M | 68 | $762.35K |
| Bullitt County, KY | 243 | 3.0 | 135 | $37.42M | 684 | $18.94M |
| Henry County, KY | 63 | 4.0 | 38 | $3.85M | 12 | $387.39K |
| Jefferson County, KY | 4,554 | 5.9 | 2,678 | $3.81B | 37,061 | $1.64B |
| Oldham County, KY | 310 | 4.7 | 186 | $24.90M | 365 | $9.42M |
| Shelby County, KY | 216 | 4.6 | 123 | $10.18M | 166 | $2.58M |
| Spencer County, KY | 77 | 4.2 | 41 | $510.46K | 2 | $89.87K |
Unsurprisingly (to me, at least) Downtown has the largest concentration of nonprofit headquarters. Since Districts have approximately the same number of residents and I didn’t have the data on hand, I did not include per capita figures.
council_df <- bmf_lou %>%
group_by(district) %>%
count() %>%
ungroup() %>%
filter(!is.na(district)) %>%
transmute(
`Council District` = district,
Nonprofits = n)
council_df %<>%
left_join(map_district, by = c("Council District" = "district"))
council_df_csi <- bmf_lou %>%
group_by(district) %>%
filter(SUBSECTION == "03", org_type == "Public charity") %>%
summarize(
CSIs = n(),
Revenue = sum(revenue, na.rm = T),
Employees = sum(employees, na.rm = TRUE),
Payroll = sum(CYSalariesCompEmpBnftPaidAmt, na.rm = TRUE)) %>%
filter(!is.na(district))
council_df %<>%
left_join(council_df_csi, by = c("Council District" = "district"))
pal <- leaflet::colorNumeric(
palette = RColorBrewer::brewer.pal(9, "BuPu"),
domain = range(council_df$Nonprofits))
labels <- sprintf("%s<br/>%s",
"District " %p% council_df$`Council District`,
"Nonprofit HQs: " %p% council_df$Nonprofits) %>%
lapply(htmltools::HTML)
leaflet(st_as_sf(council_df)) %>%
addPolygons(
color = "#444444", weight = 1,
smoothFactor = 0.5, opacity = 1.0, fillOpacity = 0.5,
fillColor = ~pal(Nonprofits),
label = labels,
labelOptions = labelOptions(
style = list("font-weight" = "normal", "font-family" = "Arial", padding = "3px 8px"),
textsize = "15px",
direction = "auto"))
council_df %<>%
select(`Council District`, Nonprofits,
`Charitable Social Impact Orgs` = CSIs, Revenue, Payroll, Employees)
council_df %>%
downloadthis::download_this(
output_name = "Nonprofits by Metro Council District",
output_extension = ".csv",
button_label = "Download data",
button_type = "warning",
has_icon = TRUE,
icon = "fa fa-save",
csv2 = FALSE)
council_df %>%
gt() %>%
fmt_integer(columns = c(Nonprofits)) %>%
tab_header(title = "Nonprofits by Council District",
subtitle = "") %>%
fmt_integer(columns = c(Nonprofits, `Charitable Social Impact Orgs`, Employees)) %>%
fmt_currency(columns = c(Revenue, Payroll), suffix = T) %>%
tab_spanner(
label = "Nonprofits",
columns = vars(Nonprofits)) %>%
tab_spanner(
label = "Charitable Social Impact Organizations",
columns = vars(`Charitable Social Impact Orgs`, Revenue, Employees, Payroll)) %>%
cols_label(
Nonprofits = "Number",
`Charitable Social Impact Orgs` = "Number",
Revenue = "Total Revenue") %>%
apply_table_settings()
| Nonprofits by Council District | |||||
|---|---|---|---|---|---|
| Council District | Nonprofits | Charitable Social Impact Organizations | |||
| Number | Number | Total Revenue | Employees | Payroll | |
| 1 | 104 | 62 | $790.97K | 5 | $68.37K |
| 2 | 76 | 35 | $1.83M | 17 | $379.02K |
| 3 | 95 | 55 | $97.66M | 1,574 | $49.36M |
| 4 | 623 | 426 | $1.46B | 14,747 | $869.20M |
| 5 | 133 | 92 | $28.78M | 822 | $12.84M |
| 6 | 207 | 139 | $793.46M | 4,440 | $118.48M |
| 7 | 261 | 87 | $16.72M | 266 | $6.31M |
| 8 | 194 | 128 | $23.80M | 373 | $9.09M |
| 9 | 209 | 135 | $132.22M | 2,242 | $55.95M |
| 10 | 161 | 91 | $61.64M | 828 | $29.10M |
| 11 | 127 | 79 | $3.12M | 37 | $1.15M |
| 12 | 77 | 47 | $7.30M | 139 | $3.83M |
| 13 | 69 | 41 | $14.25M | 1,006 | $22.92M |
| 14 | 53 | 28 | $1.40M | 23 | $244.20K |
| 15 | 88 | 45 | $28.55M | 245 | $7.59M |
| 16 | 174 | 88 | $6.53M | 26 | $715.94K |
| 17 | 116 | 81 | $5.06M | 27 | $1.22M |
| 18 | 212 | 140 | $714.05M | 6,457 | $323.32M |
| 19 | 243 | 139 | $28.15M | 258 | $4.88M |
| 20 | 137 | 87 | $21.73M | 293 | $6.71M |
| 21 | 96 | 48 | $2.59M | 48 | $1.03M |
| 22 | 90 | 59 | $4.19M | 51 | $1.91M |
| 23 | 93 | 40 | $274.25K | 0 | $0.00 |
| 24 | 59 | 33 | $62.02M | 250 | $9.88M |
| 25 | 70 | 33 | $1.39M | 10 | $211.91K |
| 26 | 121 | 77 | $130.63M | 1,245 | $58.77M |
nh_pop_df <- glpdata::population_nh %>%
filter(year == 2019,
sex == "total",
race == "total") %>%
select(neighborhood, population)
nh_df <- bmf_lou %>%
group_by(neighborhood) %>%
count() %>%
ungroup() %>%
left_join(nh_pop_df, by = "neighborhood") %>%
transmute(
Neighborhood = neighborhood,
Nonprofits = n,
`Nonprofits per 1000 Residents` = round(n / population * 1000, 1))
nh_df_csi <- bmf_lou %>%
group_by(neighborhood) %>%
filter(SUBSECTION == "03", org_type == "Public charity") %>%
summarize(
CSIs = n(),
Revenue = sum(revenue, na.rm = T),
Employees = sum(employees, na.rm = TRUE),
Payroll = sum(CYSalariesCompEmpBnftPaidAmt, na.rm = TRUE)) %>%
filter(!is.na(neighborhood))
nh_df %<>%
left_join(nh_df_csi, by = c("Neighborhood" = "neighborhood")) %>%
filter(Neighborhood != "Airport")
make_map(rename(nh_df, neighborhood = Neighborhood),
"Nonprofits",
hover_name = "Nonprofit HQ",
units = "none")
nh_df %<>%
rename(`Charitable Social Impact Orgs` = CSIs)
nh_df %>%
downloadthis::download_this(
output_name = "Nonprofits by Neighborhood",
output_extension = ".csv",
button_label = "Download data",
button_type = "warning",
has_icon = TRUE,
icon = "fa fa-save",
csv2 = FALSE)
nh_df %>%
gt() %>%
tab_header(title = "Nonprofits by Neighborhood",
subtitle = "") %>%
fmt_integer(columns = c(Nonprofits, `Charitable Social Impact Orgs`, Employees)) %>%
fmt_currency(columns = c(Revenue, Payroll), suffix = T) %>%
tab_spanner(
label = "Nonprofits",
columns = vars(Nonprofits)) %>%
tab_spanner(
label = "Charitable Social Impact Organizations",
columns = vars(`Charitable Social Impact Orgs`, Revenue, Employees, Payroll)) %>%
cols_label(
Nonprofits = "Number",
`Charitable Social Impact Orgs` = "Number",
Revenue = "Total Revenue") %>%
apply_table_settings()
| Nonprofits by Neighborhood | ||||||
|---|---|---|---|---|---|---|
| Neighborhood | Nonprofits | Nonprofits per 1000 Residents | Charitable Social Impact Organizations | |||
| Number | Number | Total Revenue | Employees | Payroll | ||
| Algonquin-Park Hill-Park Duvalle | 44 | 2.8 | 24 | $20.54M | 286 | $13.33M |
| Buechel-Newburg-Indian Trail | 115 | 3.3 | 48 | $10.66M | 132 | $3.45M |
| Butchertown-Clifton-Crescent Hill | 147 | 6.5 | 101 | $106.61M | 2,068 | $49.78M |
| California-Parkland | 69 | 7.8 | 42 | $13.03M | 140 | $4.73M |
| Chickasaw-Shawnee | 86 | 4.8 | 59 | $6.23M | 81 | $2.69M |
| Downtown-Old Louisville-University | 505 | 28.9 | 343 | $1.45B | 12,214 | $412.98M |
| Fairdale | 17 | 1.2 | 12 | $6.56M | 763 | $4.93M |
| Fern Creek | 67 | 2.4 | 43 | $4.06M | 51 | $1.91M |
| Floyd's Fork | 228 | 4.6 | 126 | $8.78M | 38 | $1.28M |
| Germantown | 77 | 5.9 | 46 | $16.69M | 229 | $7.92M |
| Highlands | 151 | 7.1 | 88 | $25.06M | 423 | $10.41M |
| Highview-Okolona | 170 | 2.6 | 86 | $68.00M | 486 | $27.58M |
| J-Town | 318 | 5.8 | 200 | $207.56M | 3,475 | $126.03M |
| Northeast Jefferson | 736 | 6.2 | 393 | $578.44M | 3,858 | $216.99M |
| Phoenix Hill-Smoketown-Shelby Park | 163 | 15.9 | 119 | $749.81M | 6,123 | $554.55M |
| Pleasure Ridge Park | 88 | 2.1 | 55 | $7.52M | 155 | $4.20M |
| Portland | 59 | 5.8 | 42 | $20.22M | 610 | $8.46M |
| Russell | 52 | 5.0 | 35 | $33.93M | 647 | $12.32M |
| Shively | 116 | 3.7 | 66 | $13.34M | 85 | $3.73M |
| South Central Louisville | 79 | 3.2 | 42 | $93.22M | 1,513 | $41.44M |
| South Louisville | 154 | 3.0 | 85 | $4.32M | 38 | $1.26M |
| Southeast Louisville | 238 | 4.2 | 160 | $164.87M | 1,882 | $79.33M |
| St. Matthews | 142 | 6.8 | 67 | $35.58M | 115 | $5.78M |
| Valley Station | 60 | 2.1 | 31 | $1.41M | 17 | $78.82K |
(The table here is pretty ungainly to look at in my opinion, but you can download it.)
zip_pop_df <- glpdata::population_zip %>%
filter(year == 2019,
sex == "total",
race == "total") %>%
select(zip, population_total) %>%
distinct()
zip_df <- bmf_lou %>%
group_by(zip) %>%
count() %>%
ungroup() %>%
left_join(zip_pop_df, by = "zip") %>%
transmute(
`Zip Code` = zip,
Nonprofits = n,
`Nonprofits per 1000 Residents` =
round(n / population_total * 1000, 1))
zip_df %>%
downloadthis::download_this(
output_name = "Nonprofits by Zip Code",
output_extension = ".csv",
button_label = "Download data",
button_type = "warning",
has_icon = TRUE,
icon = "fa fa-save",
csv2 = FALSE)
# zip_df %>%
# gt() %>%
# fmt_integer(columns = c(Nonprofits)) %>%
# tab_header(title = "Nonprofits by Zip Code",
# subtitle = "") %>%
# apply_table_settings()
make_map(rename(zip_df, zip = `Zip Code`),
"Nonprofits",
hover_name = "Nonprofit HQ",
units = "none")
cuts <- c(0, 50, 100, 200, 500, 1000, 2000, 5000, Inf) * 1000
cut_names <- c("<$50k", "$50k-$100k", "$100k-$200k", "$200k-$500k", "$500k-$1M",
"$1M-$2M", "$2M-$5M", "$5M+")
cnpe_levels <- bmf_lou %>%
filter(SUBSECTION == "03", org_type == "Public charity") %>%
mutate(revenue_cat = findInterval(revenue, cuts)) %>%
filter(revenue_cat != 0) %>%
mutate(revenue_name = factor(cut_names[revenue_cat], ordered = T, levels = cut_names))
cnpe_df <- cnpe_levels %>%
group_by(revenue_name) %>%
summarize(
`Charitable Social Impact Orgs` = n(),
Revenue = sum(revenue, na.rm = T),
Employees = sum(employees, na.rm = TRUE),
Payroll = sum(CYSalariesCompEmpBnftPaidAmt, na.rm = TRUE),
.groups = "drop") %>%
rename(`Membership Level` = revenue_name)
cnpe_df %>%
downloadthis::download_this(
output_name = "Nonprofits by CNPE Membership Level",
output_extension = ".csv",
button_label = "Download data",
button_type = "warning",
has_icon = TRUE,
icon = "fa fa-save",
csv2 = FALSE)
cnpe_df %>%
gt() %>%
tab_header(title = "Nonprofits by CNPE Membership Level",
subtitle = "") %>%
fmt_integer(columns = c(`Charitable Social Impact Orgs`, Employees)) %>%
fmt_currency(columns = c(Revenue, Payroll), suffix = T) %>%
tab_spanner(
label = "Charitable Social Impact Organizations",
columns = vars(`Charitable Social Impact Orgs`, Revenue, Employees, Payroll)) %>%
cols_label(
`Charitable Social Impact Orgs` = "Number",
Revenue = "Total Revenue") %>%
apply_table_settings()
| Nonprofits by CNPE Membership Level | ||||
|---|---|---|---|---|
| Membership Level | Charitable Social Impact Organizations | |||
| Number | Total Revenue | Employees | Payroll | |
| <$50k | 2,420 | $6.20M | 43 | $1.14M |
| $50k-$100k | 204 | $14.98M | 95 | $2.35M |
| $100k-$200k | 191 | $27.29M | 150 | $4.61M |
| $200k-$500k | 233 | $74.37M | 559 | $16.86M |
| $500k-$1M | 105 | $74.79M | 745 | $25.04M |
| $1M-$2M | 76 | $108.69M | 1,978 | $43.44M |
| $2M-$5M | 83 | $265.71M | 3,883 | $109.29M |
| $5M+ | 91 | $3.52B | 33,059 | $1.54B |
There are a couple of data issues here. The first is with very small organizations, where I need to figure out how to add them in. The 990EZ doesn’t include every revenue category, so I’m having trouble getting all of the numbers to square up. The second data issue is with a program service revenue outlier, which you can clearly see throws off the data for the $2M-$5M category.
rev_source_df <- cnpe_levels %>%
filter(filing_type == "990") %>%
group_by(revenue_name) %>%
summarize(across(c(CYContributionsGrantsAmt,
CYProgramServiceRevenueAmt,
GovernmentGrantsAmt,
CYInvestmentIncomeAmt,
CYOtherRevenueAmt,
revenue),
~ sum(., na.rm=T)),
.groups = "drop") %>%
mutate(contributions = CYContributionsGrantsAmt - GovernmentGrantsAmt) %>%
select(`Membership Level` = revenue_name,
contributions,
GovernmentGrantsAmt,
CYProgramServiceRevenueAmt,
CYInvestmentIncomeAmt,
CYOtherRevenueAmt,
revenue)
rev_source_df %>%
gt() %>%
tab_header(title = "Revenue Sources by CNPE Membership Level",
subtitle = "") %>%
fmt_currency(columns =
c(contributions,
GovernmentGrantsAmt,
CYProgramServiceRevenueAmt,
CYInvestmentIncomeAmt,
CYOtherRevenueAmt,
revenue), suffix = T) %>%
cols_label(
contributions = "Contributions and Grants",
GovernmentGrantsAmt = "Government Grants",
CYProgramServiceRevenueAmt = "Program Service Revenue",
CYInvestmentIncomeAmt = "Investment Income",
CYOtherRevenueAmt = "Other",
revenue = "Total Revenue") %>%
apply_table_settings()
| Revenue Sources by CNPE Membership Level | ||||||
|---|---|---|---|---|---|---|
| Membership Level | Contributions and Grants | Government Grants | Program Service Revenue | Investment Income | Other | Total Revenue |
| <$50k | $477.04K | $0.00 | $250.64K | $289.78K | $144.72K | $938.20K |
| $50k-$100k | $8.21M | $784.57K | $3.45M | −$22.00K | $800.84K | $14.89M |
| $100k-$200k | $10.53M | $1.02M | $6.88M | $607.85K | $1.10M | $27.14M |
| $200k-$500k | $32.82M | $3.94M | $11.89M | $1.86M | $6.22M | $74.37M |
| $500k-$1M | $33.91M | $7.20M | $18.51M | $2.53M | $1.46M | $73.24M |
| $1M-$2M | $63.49M | $19.76M | $32.04M | $4.85M | $5.59M | $104.61M |
| $2M-$5M | $93.18M | $44.13M | $2.02B | $56.38M | $17.18M | $263.59M |
| $5M+ | $519.16M | $348.65M | $2.15B | $147.49M | $98.31M | $3.52B |
rev_source_df %>%
mutate(across(contributions:revenue,
~. / revenue)) %>%
gt() %>%
tab_header(title = "Revenue Sources by CNPE Membership Level",
subtitle = "") %>%
fmt_percent(columns =
c(contributions,
GovernmentGrantsAmt,
CYProgramServiceRevenueAmt,
CYInvestmentIncomeAmt,
CYOtherRevenueAmt,
revenue)) %>%
cols_label(
contributions = "Contributions and Grants",
GovernmentGrantsAmt = "Government Grants",
CYProgramServiceRevenueAmt = "Program Service Revenue",
CYInvestmentIncomeAmt = "Investment Income",
CYOtherRevenueAmt = "Other",
revenue = "Total Revenue") %>%
apply_table_settings()
| Revenue Sources by CNPE Membership Level | ||||||
|---|---|---|---|---|---|---|
| Membership Level | Contributions and Grants | Government Grants | Program Service Revenue | Investment Income | Other | Total Revenue |
| <$50k | 50.85% | 0.00% | 26.71% | 30.89% | 15.43% | 100.00% |
| $50k-$100k | 55.14% | 5.27% | 23.17% | −0.15% | 5.38% | 100.00% |
| $100k-$200k | 38.82% | 3.74% | 25.34% | 2.24% | 4.05% | 100.00% |
| $200k-$500k | 44.14% | 5.30% | 16.00% | 2.50% | 8.36% | 100.00% |
| $500k-$1M | 46.30% | 9.83% | 25.27% | 3.45% | 2.00% | 100.00% |
| $1M-$2M | 60.70% | 18.89% | 30.63% | 4.64% | 5.35% | 100.00% |
| $2M-$5M | 35.35% | 16.74% | 767.41% | 21.39% | 6.52% | 100.00% |
| $5M+ | 14.76% | 9.91% | 61.05% | 4.19% | 2.79% | 100.00% |